library(tidyverse)
library(cowplot)
library(GGally)
library(heatmaply)
library(sva)
library(limma)
library(biobroom)
library(ggridges)
# Cells #
vf.cell.neg.raw <- read_csv("./data/abundances/vpa_fa_1and2_cells_target_negmode.csv")
vf.cell.pos.raw <- read_csv("./data/abundances/vpa_fa_1and2_cells_target_posmode.csv")
# Media #
vf.med.neg.raw <- read_csv("./data/abundances/vpa_fa_1and2_media_target_negmode.csv")
vf.med.pos.raw <- read_csv("./data/abundances/vpa_fa_1and2_media_target_posmode.csv")
# Cells #
vf.cell.neg.compound.info <- read_csv("./data/compound_info/vpa_fa_1and2_cells_target_negmode_cmpnd_info.csv")
vf.cell.pos.compound.info <- read_csv("./data/compound_info/vpa_fa_1and2_cells_target_posmode_cmpnd_info.csv")
# Media #
vf.med.neg.compound.info <- read_csv("./data/compound_info/vpa_fa_1and2_media_target_negmode_cmpnd_info.csv")
vf.med.pos.compound.info <- read_csv("./data/compound_info/vpa_fa_1and2_media_target_posmode_cmpnd_info.csv")
# Kegg/other ID reference #
cmpnd.id.db <- read_csv("./data/other/anp_db_compound_kegg.csv")
vf.cell.other.info <- read_csv("./data/other/vpa_fa_exp1_other_info.csv")
MissingPerSamplePlot <- function(raw.data, start.string) {
# Counts the number of missing/NA values per sample and
# percent compounds missing out of total number of compounds per sample
# Then passes the result into a vertical bar plot, where each
# bar represents a single sample and the size of the bar
# is the % of compounds missing
counted.na <- raw.data %>%
select(starts_with(start.string)) %>%
mutate(
count.na = apply(., 1, function(x) sum(is.na(x))),
percent.na = (count.na / ncol(raw.data %>% select(starts_with(start.string)))) * 100
) %>%
dplyr::select(count.na, percent.na) %>%
bind_cols(
raw.data %>%
select(Samples, Group)
) %>%
arrange(percent.na) %>%
mutate(f.order = factor(Samples, levels = Samples))
counted.na %>%
ggplot(aes(x = f.order, y = percent.na, fill = Group)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = 20, color = "gray", size = 1, alpha = 0.8) +
coord_flip()+
xlab("Samples") +
ylab("Percent missing values in sample") +
theme(axis.text.y = element_text(size = 6))
}
MissingPerCompound <- function(raw.data, start.string){
# Function to count in how many experimental samples each compound is missing
# Also calculates the % missing:
# (# NA per compound across all experimental samples) * 100 / (tot num of samples)
raw.data %>%
filter(Group == "sample") %>%
select(Samples, starts_with(start.string)) %>%
gather(key = "Compound", value = "Abundance", -Samples) %>%
group_by(Compound) %>%
summarise(
na_count = sum(is.na(Abundance)),
n_samples = n(),
percent_na = (na_count * 100 / n_samples)
) %>%
filter(na_count > 0) %>%
arrange(desc(percent_na))
}
ReplaceNAwMinLogTransformMult <- function(raw.dataframe, start.prefix) {
# Function to replace any NAs in each column with the minimum for that column,
# separately for each sample type.
# NA in negative control samples are replaced by 2.
# Then data is log2 transformed
# experimental samples #
smpls <- raw.dataframe %>%
filter(Group == "sample") %>%
dplyr::select(starts_with(start.prefix))
smpls.min <- lapply(smpls, min, na.rm = TRUE)
smpls.noNA <- raw.dataframe %>%
filter(Group == "sample") %>%
dplyr::select(Samples:Plate) %>%
bind_cols(
smpls %>%
replace_na(replace = smpls.min) %>%
log2()
)
# QC samples #
QC <- raw.dataframe %>%
filter(Group == "QC") %>%
dplyr::select(starts_with(start.prefix))
QC.min <- lapply(QC, min, na.rm = TRUE)
# replace the missing values in the QC with the minimum of the QC
# then take the log
QC.noNA <- raw.dataframe %>%
filter(Group == "QC") %>%
dplyr::select(Samples:Plate) %>%
bind_cols(
QC %>%
replace_na(replace = QC.min) %>%
log2()
)
# not samples or QC #
other.min <- setNames(
as.list(
rep(2, ncol(
raw.dataframe %>%
dplyr::select(starts_with(start.prefix))))
),
colnames(raw.dataframe %>% dplyr::select(starts_with(start.prefix)))
)
other.num.log <- raw.dataframe %>%
filter(Group != "QC" & Group != "sample") %>%
dplyr::select(Samples:Plate) %>%
bind_cols(
raw.dataframe %>%
filter(Group != "QC" & Group != "sample") %>%
dplyr::select(starts_with(start.prefix)) %>%
replace_na(replace = other.min) %>%
log2()
)
all.noNA <- smpls.noNA %>%
bind_rows(QC.noNA) %>%
bind_rows(other.num.log)
}
HeatmapPrepAlt <- function(raw.data, start.prefix){
# function for preparing dara for heatmap viz
x <- raw.data %>%
select(starts_with(start.prefix)) %>%
scale(center = TRUE, scale = TRUE) %>%
as.matrix()
row.names(x) <- raw.data$Samples
return(x)
}
Q: What are the distributions of compound masses and retention times?
full.vf.cmpnd <- vf.cell.neg.compound.info %>%
mutate(Set = "Cells / Neg") %>%
bind_rows(vf.cell.pos.compound.info %>% mutate(Set = "Cells / Pos")) %>%
bind_rows(vf.med.neg.compound.info %>% mutate(Set = "Media / Neg")) %>%
bind_rows(vf.med.pos.compound.info %>% mutate(Set = "Media / Pos"))
full.vf.cmpnd %>%
ggplot(aes(x = rt, y = mass)) +
geom_point(size = 3, alpha = 0.3) +
xlab("Retention Time (min)") +
ylab("Mass (Da)") +
ggtitle("Mass v RT\nVPA + FA Exp") +
ylim(0, 1000)
full.vf.cmpnd %>%
ggplot(aes(x = rt, y = mass, color = Set)) +
geom_point(size = 3, alpha = 0.8) +
xlab("Retnetion Time (min)") +
ylab("Mass (Da)") +
ggtitle("Mass v RT\nVPA + FA Exp") +
facet_wrap(~ Set) +
ylim(0, 1000)
Q: Which compounds were found in one or more of the data types?
## cell join ##
vf.cell.cmpnd.join <- vf.cell.neg.compound.info %>%
inner_join(vf.cell.pos.compound.info, by = "cas_id", suffix = c(".c.n", ".c.p")) %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
# compound names - found in pos and neg mode / cells
print(vf.cell.cmpnd.join$compound_full.c.n)
[1] "Methylglyoxal"
[2] "Glycine"
[3] "Alanine"
[4] "Sarcosine"
[5] "GABA"
[6] "2-Aminobutyric Acid"
[7] "Serine"
[8] "Hypotaurine"
[9] "Uracil"
[10] "Proline"
[11] "Caproic Acid"
[12] "Valine"
[13] "Methylmalonic Acid"
[14] "Threonine"
[15] "Cysteine"
[16] "Taurine"
[17] "4-Oxoproline"
[18] "trans-4-Hydroxyproline"
[19] "Creatine"
[20] "Isoleucine"
[21] "Leucine"
[22] "Asparagine"
[23] "Aspartic Acid"
[24] "Adenine"
[25] "Hypoxanthine"
[26] "Glutamine"
[27] "Lysine"
[28] "Glutamic Acid"
[29] "Methionine"
[30] "Guanine"
[31] "Xanthine"
[32] "3-Sulfinoalanine"
[33] "Histidine"
[34] "Orotic Acid"
[35] "Allantoin"
[36] "Phenylalanine"
[37] "G3P"
[38] "Glycerol 1-Phosphate / Glycerol 3-Phosphate"
[39] "Arginine"
[40] "N-Carbamoyl-L-aspartic Acid"
[41] "Tyrosine"
[42] "D-Galactitol"
[43] "N-alpha-Acetylglutamine"
[44] "Glucuronic Acid"
[45] "Galacturonic Acid"
[46] "Tryptophan"
[47] "Phosphocreatine"
[48] "O-Succinylhomoserine"
[49] "Pantothenic Acid"
[50] "GalNAc"
[51] "Cystathionine"
[52] "Deoxycytidine"
[53] "10Z-Pentadecenoic Acid"
[54] "Uridine"
[55] "Palmitoleic Acid"
[56] "Ribothymidine"
[57] "Thiamine"
[58] "Inosine"
[59] "10Z-Heptadecenoic Acid"
[60] "Myoinositol-methyl-phosphate"
[61] "Oleic Acid"
[62] "Guanosine"
[63] "1-Monomyristin"
[64] "Glutathione"
[65] "Neu5Ac"
[66] "Arachidic Acid"
[67] "UMP"
[68] "3-Phosphoglyceroinositol"
[69] "AMP"
[70] "GMP"
[71] "SAH"
[72] "CDP"
[73] "ADP"
[74] "Folic Acid"
[75] "GDP"
[76] "LysoPE(18:0)"
[77] "dTTP"
[78] "CTP"
[79] "UTP"
[80] "dATP"
[81] "ATP"
[82] "GTP"
[83] "ADP-Ribose"
[84] "UDP-Glucuronic acid"
[85] "ADP-Glucose"
[86] "GDP-Glucose"
[87] "UDP-GalNAc"
[88] "UDP-GlcNAc"
[89] "GSSG"
[90] "NAD"
[91] "NADH"
[92] "NADP"
[93] "CoA"
[94] "PC(36:4)"
[95] "Acetyl-CoA"
# percent of cell / neg compounds found in cell / pos
round(nrow(vf.cell.cmpnd.join) * 100 / nrow(vf.cell.neg.compound.info), 1)
[1] 53.4
# percent of cell / neg compounds found in cell / pos
round(nrow(vf.cell.cmpnd.join) * 100 / nrow(vf.cell.pos.compound.info), 1)
[1] 56.5
vf.cell.cmpnd.join %>%
select(contains("mass")) %>%
ggpairs()
vf.cell.cmpnd.join %>%
select(starts_with("rt")) %>%
ggpairs()
### Media join ###
vf.med.cmpnd.join <- vf.med.neg.compound.info %>%
inner_join(vf.med.pos.compound.info, by = "cas_id", suffix = c(".m.n", ".m.p")) %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
# compound names - found in pos and neg mode / media
print(vf.med.cmpnd.join$compound_full.m.n)
[1] "Glycine" "Alanine" "Serine"
[4] "Uracil" "Creatinine" "Valine"
[7] "Threonine" "Taurine" "4-Oxoproline"
[10] "Isoleucine" "Leucine" "Hypoxanthine"
[13] "Glutamine" "Lysine" "Glutamic Acid"
[16] "Methionine" "Allantoin" "Phenylalanine"
[19] "Pyridoxine" "Tyrosine" "D-Galactitol"
[22] "Pantothenic Acid" "Uridine" "Inosine"
[25] "Guanosine" "Folic Acid"
# percent of media / neg compounds found in media / pos
round(nrow(vf.med.cmpnd.join) * 100 / nrow(vf.med.neg.compound.info), 1)
[1] 49.1
# percent of media / pos compounds found in media / neg
round(nrow(vf.med.cmpnd.join) * 100 / nrow(vf.med.pos.compound.info), 1)
[1] 51
# vf all match
vf.all.cmpnd.join <- vf.cell.cmpnd.join %>%
inner_join(vf.med.cmpnd.join, by = "cas_id") %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
nrow(vf.all.cmpnd.join)
[1] 24
vf.all.cmpnd.join %>%
select(contains("mass")) %>%
ggpairs()
vf.all.cmpnd.join %>%
select(starts_with("rt")) %>%
ggpairs()
Q: Do any of the samples have greater than 20% missing (NA) compound abundances, out of all of the features in the dataset?
MissingPerSamplePlot(vf.cell.neg.raw, "VPA_FAnC") +
ggtitle("Missing Per Sample\nVPA + FA / Cells / Neg Mode")
MissingPerSamplePlot(vf.cell.pos.raw, "VPA_FApC") +
ggtitle("Missing Per Sample\nVPA + FA / Cells / Pos Mode")
MissingPerSamplePlot(vf.med.neg.raw, "VPA_FAnM") +
ggtitle("Missing Per Sample\nVPA + FA / Media / Neg Mode")
MissingPerSamplePlot(vf.med.pos.raw, "VPA_FApM") +
ggtitle("Missing Per Sample\nVPA + FA / Media / Pos Mode")
Q: Are any of the compounds more than 20% missing in the experimental sample group? If there are any, they will be excluded from analysis.
MissingPerCompound(vf.cell.neg.raw, "VPA_FAnC") %>%
filter(percent_na > 20)
# A tibble: 0 x 4
# ... with 4 variables: Compound <chr>, na_count <int>, n_samples <int>,
# percent_na <dbl>
(vf.cell.pos.cmpnd.to.excl <- MissingPerCompound(vf.cell.pos.raw, "VPA_FApC") %>%
filter(percent_na > 20))
# A tibble: 1 x 4
Compound na_count n_samples percent_na
<chr> <int> <int> <dbl>
1 VPA_FApC80 23 24 95.8
(vf.med.neg.cmpnd.to.excl <- MissingPerCompound(vf.med.neg.raw, "VPA_FAnM") %>%
filter(percent_na > 20))
# A tibble: 4 x 4
Compound na_count n_samples percent_na
<chr> <int> <int> <dbl>
1 VPA_FAnM49 24 24 100
2 VPA_FAnM50 24 24 100
3 VPA_FAnM51 24 24 100
4 VPA_FAnM53 6 24 25
(vf.med.pos.cmpnd.to.excl <- MissingPerCompound(vf.med.pos.raw, "VPA_FApM") %>%
filter(percent_na > 20))
# A tibble: 3 x 4
Compound na_count n_samples percent_na
<chr> <int> <int> <dbl>
1 VPA_FApM37 24 24 100
2 VPA_FApM41 24 24 100
3 VPA_FApM43 24 24 100
vf.cell.neg.raw.grp.mean <- vf.cell.neg.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPA_FAnC")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vf.cell.neg.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA + FA / Cells / Negative Mode\nGrouped by sample type")
vf.cell.neg.raw.grp.mean.order <- vf.cell.neg.raw.grp.mean %>%
filter(Group == "sample") %>%
arrange(Grp_mean_abun)
vf.cell.neg.raw %>%
select(Samples, Group, starts_with("VPA_FAnC")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
geom_line(
data = vf.cell.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.neg.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA + FA / Cells / Negative Mode")
vf.cell.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA + FA / Cells / Negative Mode")
vf.cell.neg.raw.grp.diff <- vf.cell.neg.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vf.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 50)
vf.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 50)
vf.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-4, 13) +
ylim(-1, 13) +
# abline in pink
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
# lm line in green
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Cells / Neg Mode")
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv or empty)
vf.cell.neg.cmpnd.to.incl <- vf.cell.neg.raw.grp.diff %>%
filter((smpl_solv_diff > 2.5 & smpl_empty_diff > 2.5) | is.na(smpl_solv_diff) | is.na(smpl_empty_diff))
# original number of metabolites
nrow(vf.cell.neg.raw.grp.diff)
[1] 178
# number of metabolites after filtering
nrow(vf.cell.neg.cmpnd.to.incl)
[1] 131
vf.cell.pos.raw.grp.mean <- vf.cell.pos.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPA_FApC")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vf.cell.pos.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA + FA / Cells / Positive Mode\nGrouped by sample type")
vf.cell.pos.raw.grp.mean.order <- vf.cell.pos.raw.grp.mean %>%
filter(Group == "sample") %>%
arrange(Grp_mean_abun)
vf.cell.pos.raw %>%
select(Samples, Group, starts_with("VPA_FApC")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
geom_line(
data = vf.cell.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.pos.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA + FA / Cells / Positive Mode")
vf.cell.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA + FA / Cells / Positive Mode")
vf.cell.pos.raw.grp.diff <- vf.cell.pos.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vf.cell.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 50)
vf.cell.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 50)
vf.cell.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-5, 11) +
ylim(-1, 11) +
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Cells / Pos Mode")
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv or empty)
vf.cell.pos.cmpnd.to.incl <- vf.cell.pos.raw.grp.diff %>%
filter((smpl_solv_diff > 2.5 & smpl_empty_diff > 2.5) | is.na(smpl_solv_diff) | is.na(smpl_empty_diff)) %>%
filter(!(Compound %in% vf.cell.pos.cmpnd.to.excl$Compound)) %>%
filter(!(Compound %in% vf.cell.pos.cmpnd.to.excl$Compound))
# original number of metabolites
nrow(vf.cell.pos.raw.grp.diff)
[1] 168
# filtered number
nrow(vf.cell.pos.cmpnd.to.incl)
[1] 120
vf.med.neg.raw.grp.mean <- vf.med.neg.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPA_FAnM")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vf.med.neg.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA + FA / Media / Negative Mode\nGrouped by sample type")
vf.med.neg.raw.grp.mean.order <- vf.med.neg.raw.grp.mean %>%
filter(Group == "empty") %>%
arrange(Grp_mean_abun)
vf.med.neg.raw %>%
select(Samples, Group, starts_with("VPA_FAnM")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 2) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
geom_line(
data = vf.med.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.neg.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA + FA / Media / Negative Mode")
vf.med.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA + FA / Media / Negative Mode")
vf.med.neg.raw.grp.diff <- vf.med.neg.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vf.med.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 25)
vf.med.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 25)
vf.med.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-1, 4) +
ylim(-1, 7) +
# abline in pink
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
# lm line in green
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Media / Neg Mode")
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv)
vf.med.neg.cmpnd.to.incl <- vf.med.neg.raw.grp.diff %>%
filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff)) %>%
filter(!(Compound %in% vf.med.neg.cmpnd.to.excl$Compound))
nrow(vf.med.neg.raw.grp.diff)
[1] 53
nrow(vf.med.neg.cmpnd.to.incl)
[1] 43
vf.med.pos.raw.grp.mean <- vf.med.pos.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPA_FApM")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vf.med.pos.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA + FA / Media / Positive Mode\nGrouped by sample type")
vf.med.pos.raw.grp.mean.order <- vf.med.pos.raw.grp.mean %>%
filter(Group == "empty") %>%
arrange(Grp_mean_abun)
vf.med.pos.raw %>%
select(Samples, Group, starts_with("VPA_FApM")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 2) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
geom_line(
data = vf.med.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.pos.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA + FA / Media / Positive Mode")
vf.med.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA + FA / Media / Positive Mode")
vf.med.pos.raw.grp.diff <- vf.med.pos.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vf.med.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 25)
vf.med.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 25)
vf.med.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-5, 1) +
ylim(-1, 7) +
# abline in pink
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
# lm line in green
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Media / pos Mode")
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv)
vf.med.pos.cmpnd.to.incl <- vf.med.pos.raw.grp.diff %>%
filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff)) %>%
filter(!(Compound %in% vf.med.pos.cmpnd.to.excl$Compound))
nrow(vf.med.pos.raw.grp.diff)
[1] 51
nrow(vf.med.pos.cmpnd.to.incl)
[1] 32
vf.cell.neg.noNA <- vf.cell.neg.raw %>%
select(Samples:Plate, one_of(vf.cell.neg.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformMult("VPA_FAnC")
vf.cell.pos.noNA <- vf.cell.pos.raw %>%
select(Samples:Plate, one_of(vf.cell.pos.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformMult("VPA_FApC")
vf.med.neg.noNA <- vf.med.neg.raw %>%
select(Samples:Plate, one_of(vf.med.neg.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformMult("VPA_FAnM")
vf.med.pos.noNA <- vf.med.pos.raw %>%
select(Samples:Plate, one_of(vf.med.pos.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformMult("VPA_FApM")
vf.cell.neg.noNA.gathered <- vf.cell.neg.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vf.cell.neg.noNA) == "VPA_FAnC10"):ncol(vf.cell.neg.noNA)
)
# plot all abundances in a sample, grouped by sample as a boxplot
vf.cell.neg.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA + FA / Cells / Negative Mode")
# same data format, but as ridge plots
vf.cell.neg.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA + FA / Cells / Negative Mode")
# experimental samples only
vf.cell.neg.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA + FA / Cells / Negative Mode")
# overlay the distributions for another look
vf.cell.neg.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA + FA / Cells / Negative Mode")
vf.cell.pos.noNA.gathered <- vf.cell.pos.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vf.cell.pos.noNA) == "VPA_FApC10"):ncol(vf.cell.pos.noNA)
)
vf.cell.pos.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA + FA / Cells / Positive Mode")
vf.cell.pos.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA + FA / Cells / Positive Mode")
vf.cell.pos.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA + FA / Cells / Positive Mode")
vf.cell.pos.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA + FA / Cells / Positive Mode")
vf.med.neg.noNA.gathered <- vf.med.neg.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vf.med.neg.noNA) == "VPA_FAnM1"):ncol(vf.med.neg.noNA)
)
vf.med.neg.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA + FA / Media / Negative Mode")
vf.med.neg.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA + FA / Media / Negative Mode")
vf.med.neg.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA + FA / Media / Negative Mode")
vf.med.neg.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA + FA / Media / Negative Mode")
vf.med.pos.noNA.gathered <- vf.med.pos.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vf.med.pos.noNA) == "VPA_FApM1"):ncol(vf.med.pos.noNA)
)
vf.med.pos.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA + FA / Media / Positive Mode")
vf.med.pos.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA + FA / Media / Positive Mode")
vf.med.pos.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA + FA / Media / Positive Mode")
vf.med.pos.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA + FA / Media / Positive Mode")
Some plots to understand the relationship between the samples, QC samples, solvent, and empty samples in some cases.
### PCA on all Samples ###
vf.cell.neg.full.pca <- vf.cell.neg.noNA %>%
select(starts_with("VPA_FAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.neg.full.pca$sdev ^ 2) * 100 / sum(vf.cell.neg.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA + FA / Cells / Negative Mode",
type = "b"
)
vf.cell.neg.full.pca.x <- as.data.frame(vf.cell.neg.full.pca$x)
row.names(vf.cell.neg.full.pca.x) <- vf.cell.neg.noNA$Samples
vf.cell.neg.full.pca.x <- vf.cell.neg.full.pca.x %>%
bind_cols(vf.cell.neg.noNA %>% select(Group:Plate))
vf.cell.neg.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (91.7% Var)") +
ylab("PC2 (4.2% Var)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA + FA / Cells / Negative Mode")
### Samples and QC ###
vf.cell.neg.smpl.QC.pca <- vf.cell.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPA_FAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.neg.smpl.QC.pca$sdev ^ 2) * 100 / sum(vf.cell.neg.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA + FA / Cells / Negative Mode",
type = "b"
)
vf.cell.neg.smpl.QC.pca.x <- as.data.frame(vf.cell.neg.smpl.QC.pca$x)
vf.cell.neg.smpl.QC.pca.x <- vf.cell.neg.smpl.QC.pca.x %>%
bind_cols(
vf.cell.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Plate)
)
row.names(vf.cell.neg.smpl.QC.pca.x) <- vf.cell.neg.smpl.QC.pca.x$Samples
vf.cell.neg.smpl.QC.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (32.3% Var)") +
ylab("PC2 (25.1% Var)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA + FA / Cells / Negative Mode")
vf.cell.neg.smpl.QC.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (13.7% Var)") +
ylab("PC4 (6.5% Var)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA + FA / Cells / Negative Mode")
### Experimental Samples Only ###
vf.cell.neg.smpl.pca <- vf.cell.neg.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPA_FAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.neg.smpl.pca$sdev ^ 2) * 100 / sum(vf.cell.neg.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA + FA / Cells / Negative Mode",
type = "b"
)
vf.cell.neg.smpl.pca.x <- as.data.frame(vf.cell.neg.smpl.pca$x)
vf.cell.neg.smpl.pca.x <- vf.cell.neg.smpl.pca.x %>%
bind_cols(
vf.cell.neg.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Plate)
)
row.names(vf.cell.neg.smpl.pca.x) <- vf.cell.neg.smpl.pca.x$Samples
vf.cell.neg.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (44.5% Var)") +
ylab("PC2 (19.3% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Cells / Negative Mode")
vf.cell.neg.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (10.4% Var)") +
ylab("PC4 (7.7% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Cells / Negative Mode")
### PCA on all Samples ###
vf.cell.pos.full.pca <- vf.cell.pos.noNA %>%
select(starts_with("VPA_FApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.pos.full.pca$sdev ^ 2) * 100 / sum(vf.cell.pos.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA + FA / Cells / Positive Mode",
type = "b"
)
vf.cell.pos.full.pca.x <- as.data.frame(vf.cell.pos.full.pca$x)
row.names(vf.cell.pos.full.pca.x) <- vf.cell.pos.noNA$Samples
vf.cell.pos.full.pca.x <- vf.cell.pos.full.pca.x %>%
bind_cols(vf.cell.pos.noNA %>% select(Group:Plate))
vf.cell.pos.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (92.0% Var)") +
ylab("PC2 (2.7% Var)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA + FA / Cells / Positive Mode")
### Samples and QC ###
vf.cell.pos.smpl.QC.pca <- vf.cell.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPA_FApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.pos.smpl.QC.pca$sdev ^ 2) * 100 / sum(vf.cell.pos.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA + FA / Cells / Positive Mode",
type = "b"
)
vf.cell.pos.smpl.QC.pca.x <- as.data.frame(vf.cell.pos.smpl.QC.pca$x)
vf.cell.pos.smpl.QC.pca.x <- vf.cell.pos.smpl.QC.pca.x %>%
bind_cols(
vf.cell.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Plate)
)
row.names(vf.cell.pos.smpl.QC.pca.x) <- vf.cell.pos.smpl.QC.pca.x$Samples
vf.cell.pos.smpl.QC.pca.x %>%
unite("Treatment", VPA:FA, sep = "_", remove = FALSE) %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (38.2% Var)") +
ylab("PC2 (20.0% Var)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA + FA / Cells / Positive Mode")
vf.cell.pos.smpl.QC.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (12.0% Var)") +
ylab("PC4 (7.7% Var)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA + FA / Cells / Positive Mode")
### Experimental Samples Only ###
vf.cell.pos.smpl.pca <- vf.cell.pos.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPA_FApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.pos.smpl.pca$sdev ^ 2) * 100 / sum(vf.cell.pos.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA + FA / Cells / Positive Mode",
type = "b"
)
vf.cell.pos.smpl.pca.x <- as.data.frame(vf.cell.pos.smpl.pca$x)
vf.cell.pos.smpl.pca.x <- vf.cell.pos.smpl.pca.x %>%
bind_cols(
vf.cell.pos.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Plate)
)
row.names(vf.cell.pos.smpl.pca.x) <- vf.cell.pos.smpl.pca.x$Samples
vf.cell.pos.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (49.4% Var)") +
ylab("PC2 (15.7% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Cells / Positive Mode")
vf.cell.pos.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (9.8% Var)") +
ylab("PC4 (7.6% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Cells / Positive Mode")
### PCA on all Samples ###
vf.med.neg.full.pca <- vf.med.neg.noNA %>%
select(starts_with("VPA_FAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.neg.full.pca$sdev ^ 2) * 100 / sum(vf.med.neg.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA + FA / Media / Negative Mode",
type = "b"
)
vf.med.neg.full.pca.x <- as.data.frame(vf.med.neg.full.pca$x)
row.names(vf.med.neg.full.pca.x) <- vf.med.neg.noNA$Samples
vf.med.neg.full.pca.x <- vf.med.neg.full.pca.x %>%
bind_cols(vf.med.neg.noNA %>% select(Group:Plate))
vf.med.neg.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (84.2% Var)") +
ylab("PC2 (5.2% Var)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA + FA / Media / Negative Mode")
### Samples and QC ###
vf.med.neg.smpl.QC.pca <- vf.med.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPA_FAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.neg.smpl.QC.pca$sdev ^ 2) * 100 / sum(vf.med.neg.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA + FA / Media / Negative Mode",
type = "b"
)
vf.med.neg.smpl.QC.pca.x <- as.data.frame(vf.med.neg.smpl.QC.pca$x)
vf.med.neg.smpl.QC.pca.x <- vf.med.neg.smpl.QC.pca.x %>%
bind_cols(
vf.med.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Plate)
)
row.names(vf.med.neg.smpl.QC.pca.x) <- vf.med.neg.smpl.QC.pca.x$Samples
vf.med.neg.smpl.QC.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (49.3% Var)") +
ylab("PC2 (25.9% Var)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA + FA / Media / Negative Mode")
vf.med.neg.smpl.QC.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (11.0% Var)") +
ylab("PC4 (5.0% Var)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA + FA / Media / Negative Mode")
### Experimental Samples Only ###
vf.med.neg.smpl.pca <- vf.med.neg.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPA_FAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.neg.smpl.pca$sdev ^ 2) * 100 / sum(vf.med.neg.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA + FA / Media / Negative Mode",
type = "b"
)
vf.med.neg.smpl.pca.x <- as.data.frame(vf.med.neg.smpl.pca$x)
vf.med.neg.smpl.pca.x <- vf.med.neg.smpl.pca.x %>%
bind_cols(
vf.med.neg.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Plate)
)
row.names(vf.med.neg.smpl.pca.x) <- vf.med.neg.smpl.pca.x$Samples
vf.med.neg.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (53.5% Var)") +
ylab("PC2 (26.6% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Media / Negative Mode")
vf.med.neg.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (11.6% Var)") +
ylab("PC4 (3.6% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Media / Negative Mode")
### PCA on all Samples ###
vf.med.pos.full.pca <- vf.med.pos.noNA %>%
select(starts_with("VPA_FApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.pos.full.pca$sdev ^ 2) * 100 / sum(vf.med.pos.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA + FA / Media / Positive Mode",
type = "b"
)
vf.med.pos.full.pca.x <- as.data.frame(vf.med.pos.full.pca$x)
row.names(vf.med.pos.full.pca.x) <- vf.med.pos.noNA$Samples
vf.med.pos.full.pca.x <- vf.med.pos.full.pca.x %>%
bind_cols(vf.med.pos.noNA %>% select(Group:Plate))
vf.med.pos.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (84.2% Var)") +
ylab("PC2 (5.2% Var)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA + FA / Media / Positive Mode")
### Samples and QC ###
vf.med.pos.smpl.QC.pca <- vf.med.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPA_FApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.pos.smpl.QC.pca$sdev ^ 2) * 100 / sum(vf.med.pos.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA + FA / Media / Positive Mode",
type = "b"
)
vf.med.pos.smpl.QC.pca.x <- as.data.frame(vf.med.pos.smpl.QC.pca$x)
vf.med.pos.smpl.QC.pca.x <- vf.med.pos.smpl.QC.pca.x %>%
bind_cols(
vf.med.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Plate)
)
row.names(vf.med.pos.smpl.QC.pca.x) <- vf.med.pos.smpl.QC.pca.x$Samples
vf.med.pos.smpl.QC.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (62.4% Var)") +
ylab("PC2 (16.0% Var)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA + FA / Media / Positive Mode")
vf.med.pos.smpl.QC.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (11.1% Var)") +
ylab("PC4 (4.2% Var)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA + FA / Media / Positive Mode")
### Experimental Samples Only ###
vf.med.pos.smpl.pca <- vf.med.pos.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPA_FApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.pos.smpl.pca$sdev ^ 2) * 100 / sum(vf.med.pos.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA + FA / Media / Positive Mode",
type = "b"
)
vf.med.pos.smpl.pca.x <- as.data.frame(vf.med.pos.smpl.pca$x)
vf.med.pos.smpl.pca.x <- vf.med.pos.smpl.pca.x %>%
bind_cols(
vf.med.pos.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Plate)
)
row.names(vf.med.pos.smpl.pca.x) <- vf.med.pos.smpl.pca.x$Samples
vf.med.pos.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = FA, shape = VPA, label = row.names(vf.med.pos.smpl.pca.x))) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (50.1% Var)") +
ylab("PC2 (22.0% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Media / Positive Mode") +
geom_text(hjust = 1)
vf.med.pos.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (16.8% Var)") +
ylab("PC4 (4.4% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Media / Positive Mode")
vf.med.pos.noNA <- vf.med.pos.noNA %>%
filter(Samples != "T1_P2_FA2")
### Experimental Samples Only ###
vf.med.pos.smpl.filt.pca <- vf.med.pos.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPA_FApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.pos.smpl.filt.pca$sdev ^ 2) * 100 / sum(vf.med.pos.smpl.filt.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA + FA / Media / Positive Mode",
type = "b"
)
vf.med.pos.smpl.filt.pca.x <- as.data.frame(vf.med.pos.smpl.filt.pca$x)
vf.med.pos.smpl.filt.pca.x <- vf.med.pos.smpl.filt.pca.x %>%
bind_cols(
vf.med.pos.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Plate)
)
row.names(vf.med.pos.smpl.filt.pca.x) <- vf.med.pos.smpl.filt.pca.x$Samples
vf.med.pos.smpl.filt.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (52.1% Var)") +
ylab("PC2 (29.0% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Media / Positive Mode")
vf.med.pos.smpl.filt.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (7.5% Var)") +
ylab("PC4 (4.1% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Media / Positive Mode")
Is VPA metabolised by cells?
vf.med.neg.noNA %>%
select(Samples:Plate, VPA_FAnM27) %>%
unite(col = "Treatment_Group", Group, VPA, FA, sep = "_") %>%
ggplot(aes(Treatment_Group, VPA_FAnM27)) +
geom_jitter(alpha = 0.8, width = 0.1) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
vf.med.neg.noNA %>%
filter((Group == "sample" | Group == "empty") & VPA == "vpa") %>%
group_by(Group, VPA, FA) %>%
summarize(
vpa.avg = mean(VPA_FAnM27),
vpa.sd = sd(VPA_FAnM27)
)
# A tibble: 4 x 5
# Groups: Group, VPA [?]
Group VPA FA vpa.avg vpa.sd
<chr> <chr> <chr> <dbl> <dbl>
1 empty vpa cntrl 17.3 0.0517
2 empty vpa fa 17.3 0.0441
3 sample vpa cntrl 17.3 0.241
4 sample vpa fa 17.4 0.166
It seems likely that VPA is not getting metabolised to a great extent, but it is not possible to be sure.
Is folic acid metabolised by cells?
vf.med.neg.noNA %>%
select(Samples:Plate, VPA_FAnM52) %>%
unite(col = "Treatment_Group", Group, VPA, FA, sep = "_") %>%
ggplot(aes(Treatment_Group, VPA_FAnM52)) +
geom_jitter(alpha = 0.8, width = 0.1) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
vf.med.neg.noNA %>%
filter((Group == "sample" | Group == "empty") & FA == "fa") %>%
group_by(Group, VPA, FA) %>%
summarize(
fa.avg = mean(VPA_FAnM52),
fa.sd = sd(VPA_FAnM52)
)
# A tibble: 4 x 5
# Groups: Group, VPA [?]
Group VPA FA fa.avg fa.sd
<chr> <chr> <chr> <dbl> <dbl>
1 empty cntrl fa 14.6 0.214
2 empty vpa fa 14.4 0.0992
3 sample cntrl fa 14.4 0.370
4 sample vpa fa 14.6 0.361
# sample prep
vf.cell.neg.smpl.data <- vf.cell.neg.noNA %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_")
vf.cell.neg.data.for.sva <- as.matrix(
vf.cell.neg.smpl.data[, which(
colnames(vf.cell.neg.smpl.data) == "VPA_FAnC10"
):ncol(vf.cell.neg.smpl.data)]
)
row.names(vf.cell.neg.data.for.sva) <- vf.cell.neg.smpl.data$Samples
vf.cell.neg.data.for.sva <- t(vf.cell.neg.data.for.sva)
# pheno prep
vf.cell.neg.data.pheno <- as.data.frame(vf.cell.neg.smpl.data[, 5:6])
row.names(vf.cell.neg.data.pheno) <- vf.cell.neg.smpl.data$Samples
# sva calculation
vf.cell.neg.mod.vf <- model.matrix(~ 0 + as.factor(Treatment), data = vf.cell.neg.data.pheno)
vf.cell.neg.mod0 <- model.matrix(~ 1, data = vf.cell.neg.data.pheno)
set.seed(2018)
num.sv(vf.cell.neg.data.for.sva, vf.cell.neg.mod.vf, method = "be")
[1] 3
set.seed(2018)
num.sv(vf.cell.neg.data.for.sva, vf.cell.neg.mod.vf, method = "leek")
[1] 0
set.seed(2018)
vf.cell.neg.sv <- sva(vf.cell.neg.data.for.sva, vf.cell.neg.mod.vf, vf.cell.neg.mod0)
Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5
# extract the surrogate variables
vf.cell.neg.surr.var <- as.data.frame(vf.cell.neg.sv$sv)
colnames(vf.cell.neg.surr.var) <- c("S1", "S2", "S3")
vf.cell.neg.covar <- vf.cell.neg.smpl.pca.x %>%
select(Samples, PC1:PC5) %>%
inner_join(
cbind(vf.cell.neg.data.pheno, vf.cell.neg.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vf.cell.other.info, by = "Samples") %>%
as_tibble()
vf.cell.neg.covar %>%
select(Samples, Plate:S3) %>%
gather("surr_var", "value", S1:S3) %>%
ggplot(aes(Plate, value, color = Plate)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1) +
facet_wrap(~ surr_var)
summary(aov(S3 ~ Plate, data = vf.cell.neg.covar))
Df Sum Sq Mean Sq F value Pr(>F)
Plate 3 0.2328 0.07761 2.023 0.143
Residuals 20 0.7672 0.03836
vf.cell.neg.covar %>%
select(Samples, Treatment:S3) %>%
gather("surr_var", "value", S1:S3) %>%
ggplot(aes(Plate, value)) +
geom_point(size = 3, alpha = 0.8, aes(color = Treatment)) +
facet_wrap(~ surr_var)
vf.cell.neg.covar %>%
select(Samples:Plate) %>%
gather("PC", "value", PC1:PC5) %>%
ggplot(aes(Plate, value)) +
geom_boxplot() +
geom_point(size = 2, alpha = 0.8, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vf.cell.neg.covar %>%
select(PC1:PC5, S1:S3) %>%
ggcorr(label = TRUE)
vf.cell.neg.covar %>%
select(PC1:PC5, S1:S3) %>%
ggpairs()
vf.cell.neg.covar %>%
ggplot(aes(PC2, S1)) +
geom_point(size = 2, alpha = 0.8)
vf.cell.neg.covar %>%
select(PC1:PC5, prot_conc_ug_uL:run_order) %>%
ggcorr(label = TRUE)
vf.cell.neg.covar %>%
select(PC1:PC5, prot_conc_ug_uL:run_order) %>%
ggpairs()
vf.cell.neg.covar %>%
ggplot(aes(run_order, PC2, color = Plate)) +
geom_point(size = 4, alpha = 0.8)
vf.cell.neg.covar %>%
ggplot(aes(run_order, PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8)
vf.cell.neg.covar %>%
ggplot(aes(Treatment, prot_conc_ug_uL, color = Treatment)) +
geom_boxplot() +
geom_point(size = 2, alpha = 0.8)
summary(aov(prot_conc_ug_uL ~ Treatment, data = vf.cell.neg.covar))
Df Sum Sq Mean Sq F value Pr(>F)
Treatment 3 0.05925 0.019749 2.528 0.0865 .
Residuals 20 0.15625 0.007812
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vf.cell.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, PC1, color = Treatment)) +
geom_point(size = 3, alpha = 0.8)
vf.cell.neg.covar %>%
select(S1:prot_conc_ug_uL) %>%
ggcorr(label = TRUE)
vf.cell.neg.covar %>%
select(S1:prot_conc_ug_uL) %>%
ggpairs()
vf.cell.neg.covar %>%
ggplot(aes(run_order, S1, color = Treatment)) +
geom_point(size = 3, alpha = 0.8)
vf.cell.neg.covar %>%
ggplot(aes(run_order, S3, color = Treatment)) +
geom_point(size = 3, alpha = 0.8)
vf.cell.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, S2, color = Treatment)) +
geom_point(size = 3, alpha = 0.8)
vf.cell.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, S3, color = Treatment)) +
geom_point(size = 3, alpha = 0.8)
colnames(vf.cell.neg.mod.vf) <- c("cntrl", "fa_only", "vpa_only", "fa_and_vpa")
# combine the full model matrix and the surrogate variables into one
vf.cell.neg.design.sv <- cbind(vf.cell.neg.mod.vf, vf.cell.neg.surr.var[, 1:2])
vf.cell.neg.cont.mat <- makeContrasts(
vpa_lowFA = vpa_only - cntrl,
vpa_highFA = fa_and_vpa - fa_only,
FA_diff = (fa_and_vpa - fa_only) - (vpa_only - cntrl),
levels = c("cntrl", "fa_only", "vpa_only", "fa_and_vpa", "S1", "S2")
)
# fit the model/design matrix
vf.cell.neg.eb <- vf.cell.neg.data.for.sva %>%
lmFit(vf.cell.neg.design.sv) %>%
contrasts.fit(vf.cell.neg.cont.mat) %>%
eBayes()
# pull out the results for each metabolite for each comparison
vf.cell.neg.eb.tidy <- tidy(vf.cell.neg.eb) %>%
mutate(
adj_pval = p.adjust(p.value, method = "bonferroni"),
FC = 2 ^ estimate,
change_in_vpa = ifelse(FC < 1, "down", "up")
) %>%
rename(compound_short = gene)
# volcano plot
vf.cell.neg.eb.tidy %>%
ggplot(aes(estimate, -log10(adj_pval))) +
geom_point(size = 2, alpha = 0.5) +
geom_hline(linetype = "dashed", color = "#009E73", yintercept = -log10(0.05)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1.2)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1/1.2)) +
xlim(-2, 2) +
xlab("log2(FC)") +
ylab("-log10(adjusted p-value)") +
ggtitle("Volcano plot\nVPA + FA / Cells / Negative Mode")
# select statistically significant hits with a certain FC:
vf.cell.neg.hits <- vf.cell.neg.eb.tidy %>%
filter(adj_pval < 0.05 & (FC > 1.2 | FC < 1/1.2)) %>%
inner_join(vf.cell.neg.compound.info, by = "compound_short")
# how many metabolites are significant across the different contrats
table(vf.cell.neg.hits$term)
FA_diff vpa_highFA vpa_lowFA
0 29 36
# significant metabolites
sort(unique(vf.cell.neg.hits$compound_full))
[1] "2-Aminoadipic Acid" "2,4-diene-Valproic acid"
[3] "3-Phosphoglyceroinositol" "3-Sulfinoalanine"
[5] "5-Methyl-DHF" "ADP-Glucose"
[7] "AMP" "Aspartic Acid"
[9] "ATP" "CTP"
[11] "Cystathionine" "Cysteic Acid"
[13] "D-Galactitol" "dTDP"
[15] "dTTP" "G3P"
[17] "Galacturonic Acid" "GalNAc"
[19] "Glyceric Acid" "GMP"
[21] "GTP" "Guanosine"
[23] "Inosine" "Ketoleucine"
[25] "Lactic Acid" "myo-Inositol"
[27] "N-Acetylalanine" "N-Acetylaspartic Acid"
[29] "N-Acetylserine" "NADH"
[31] "Neu5Ac" "Proline"
[33] "Thiosulfate" "Thymidine"
[35] "UMP" "Uridine"
[37] "UTP"
vf.cell.neg.hits.tally2 <- vf.cell.neg.hits %>%
group_by(compound_short, compound_full) %>%
count() %>%
filter(n == 2)
vf.cell.neg.lowFA.hits <- vf.cell.neg.hits %>%
filter(term == "vpa_lowFA" & !(compound_short %in% vf.cell.neg.hits.tally2$compound_short))
vf.cell.neg.highFA.hits <- vf.cell.neg.hits %>%
filter(term == "vpa_highFA" & !(compound_short %in% vf.cell.neg.hits.tally2$compound_short))
vf.cell.neg.both.hits <- vf.cell.neg.hits %>%
filter(compound_short %in% vf.cell.neg.hits.tally2$compound_short) %>%
arrange(compound_short, term)
vf.cell.neg.both.hits %>%
select(compound_full, term, FC) %>%
spread(key = "term", value = "FC") %>%
ggplot(aes(vpa_lowFA, vpa_highFA)) +
geom_point(size = 2, alpha = 0.8) +
geom_abline(intercept = 0, slope = 1, color = "blue", alpha = 0.8)
vf.cell.neg.both.hits %>%
select(compound_full, term, FC) %>%
spread(key = "term", value = "FC") %>%
mutate(diff = vpa_highFA - vpa_lowFA) %>%
arrange(diff)
# A tibble: 28 x 4
compound_full vpa_highFA vpa_lowFA diff
<chr> <dbl> <dbl> <dbl>
1 5-Methyl-DHF 1.87 2.54 -0.673
2 2,4-diene-Valproic acid 1.81 2.32 -0.509
3 N-Acetylaspartic Acid 1.21 1.28 -0.0720
4 UTP 1.28 1.34 -0.0591
5 Aspartic Acid 1.41 1.47 -0.0547
6 dTDP 0.598 0.628 -0.0296
7 3-Sulfinoalanine 1.23 1.26 -0.0271
8 ADP-Glucose 2.89 2.91 -0.0255
9 Galacturonic Acid 0.597 0.611 -0.0147
10 N-Acetylserine 0.728 0.732 -0.00406
# ... with 18 more rows
#write_csv(path = "./results/vf_cell_neg_all_hits.csv", vf.cell.neg.hits)
### Plotting ###
vf.cell.neg.gathered <- vf.cell.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vf.cell.neg.surr.var) %>%
select(Samples, VPA, FA, S1:S2, starts_with("VPA_FAnC")) %>%
gather(key = "Compound", value = "Abundance", VPA_FAnC10:VPA_FAnC99)
vf.cell.neg.nested <- vf.cell.neg.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ S1 + S2, data = .))) %>%
mutate(augment_model = map(model, augment))
vf.cell.neg.modSV.resid <- vf.cell.neg.nested %>%
unnest(data, augment_model) %>%
select(Samples, VPA, FA, Compound, .resid) %>%
spread(Compound, .resid)
vf.cell.neg.modSV.resid %>%
select(Samples:FA, one_of(unique(vf.cell.neg.hits$compound_short))) %>%
HeatmapPrepAlt("VPA_FAnC") %>%
t() %>%
heatmaply(
colors = viridis(n = 10, option = "magma"),
xlab = "Samples", ylab = "Compounds",
main = "Statistically significant compounds\nVPA + FA Experiment / Cells / Neg Mode",
margins = c(50, 50, 75, 30),
k_col = 2, k_row = 2,
labRow = unique(vf.cell.neg.hits$compound_full)
)
### PCA - cleaned data ###
vf.cell.neg.modSV.pca <- vf.cell.neg.modSV.resid %>%
select(starts_with("VPA_FAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
vf.cell.neg.modSV.pca.x <- as.data.frame(vf.cell.neg.modSV.pca$x)
row.names(vf.cell.neg.modSV.pca.x) <- vf.cell.neg.modSV.resid$Samples
vf.cell.neg.modSV.pca.x <- vf.cell.neg.modSV.pca.x %>%
bind_cols(vf.cell.neg.modSV.resid %>% select(VPA:FA))
vf.cell.neg.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (56.3% Var)") +
ylab("PC2 (13.3% Var)")
vf.cell.neg.modSV.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (10.8% Var)") +
ylab("PC4 (4.7% Var)")
# sample prep
vf.cell.pos.smpl.data <- vf.cell.pos.noNA %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_")
vf.cell.pos.data.for.sva <- as.matrix(
vf.cell.pos.smpl.data[, which(
colnames(vf.cell.pos.smpl.data) == "VPA_FApC10"
):ncol(vf.cell.pos.smpl.data)]
)
row.names(vf.cell.pos.data.for.sva) <- vf.cell.pos.smpl.data$Samples
vf.cell.pos.data.for.sva <- t(vf.cell.pos.data.for.sva)
# pheno prep
vf.cell.pos.data.pheno <- as.data.frame(vf.cell.pos.smpl.data[, 5:6])
row.names(vf.cell.pos.data.pheno) <- vf.cell.pos.smpl.data$Samples
# sva calculation
vf.cell.pos.mod.vf <- model.matrix(~ 0 + as.factor(Treatment), data = vf.cell.pos.data.pheno)
vf.cell.pos.mod0 <- model.matrix(~ 1, data = vf.cell.pos.data.pheno)
set.seed(2018)
num.sv(vf.cell.pos.data.for.sva, vf.cell.pos.mod.vf, method = "be")
[1] 2
set.seed(2018)
num.sv(vf.cell.pos.data.for.sva, vf.cell.pos.mod.vf, method = "leek")
[1] 0
set.seed(2018)
vf.cell.pos.sv <- sva(vf.cell.pos.data.for.sva, vf.cell.pos.mod.vf, vf.cell.pos.mod0)
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
# extract the surrogate variables
vf.cell.pos.surr.var <- as.data.frame(vf.cell.pos.sv$sv)
colnames(vf.cell.pos.surr.var) <- c("S1", "S2")
vf.cell.pos.covar <- vf.cell.pos.smpl.pca.x %>%
select(Samples, PC1:PC5) %>%
inner_join(
cbind(vf.cell.pos.data.pheno, vf.cell.pos.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vf.cell.other.info, by = "Samples") %>%
as_tibble()
vf.cell.pos.covar %>%
select(Samples, Plate:S2) %>%
gather("surr_var", "value", S1:S2) %>%
ggplot(aes(Plate, value, color = Plate)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1) +
facet_wrap(~ surr_var)
vf.cell.pos.covar %>%
select(Samples, Treatment:S2) %>%
gather("surr_var", "value", S1:S2) %>%
ggplot(aes(Plate, value)) +
geom_point(size = 3, alpha = 0.8, aes(color = Treatment)) +
facet_wrap(~ surr_var)
vf.cell.pos.covar %>%
select(Samples:Plate) %>%
gather("PC", "value", PC1:PC5) %>%
ggplot(aes(Plate, value)) +
geom_boxplot() +
geom_point(size = 2, alpha = 0.8, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vf.cell.pos.covar %>%
select(PC1:PC5, S1:S2) %>%
ggcorr(label = TRUE)
vf.cell.pos.covar %>%
select(PC1:PC5, S1:S2) %>%
ggpairs()
vf.cell.pos.covar %>%
ggplot(aes(PC2, S1)) +
geom_point(size = 2, alpha = 0.8)
vf.cell.pos.covar %>%
select(PC1:PC5, run_order:prot_conc_ug_uL) %>%
ggcorr(label = TRUE)
vf.cell.pos.covar %>%
select(PC1:PC5, run_order:prot_conc_ug_uL) %>%
ggpairs()
vf.cell.pos.covar %>%
ggplot(aes(run_order, PC2, color = Plate)) +
geom_point(size = 4, alpha = 0.8)
vf.cell.pos.covar %>%
ggplot(aes(run_order, PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8)
vf.cell.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, PC2, color = Treatment)) +
geom_point(size = 3, alpha = 0.8)
vf.cell.pos.covar %>%
select(S1:prot_conc_ug_uL) %>%
ggcorr(label = TRUE)
vf.cell.pos.covar %>%
select(S1:prot_conc_ug_uL) %>%
ggpairs()
vf.cell.pos.covar %>%
ggplot(aes(run_order, S1, color = Treatment)) +
geom_point(size = 3, alpha = 0.8)
colnames(vf.cell.pos.mod.vf) <- c("cntrl", "fa_only", "vpa_only", "fa_and_vpa")
# combine the full model matrix and the surrogate variables into one
vf.cell.pos.design.sv <- cbind(vf.cell.pos.mod.vf, vf.cell.pos.surr.var)
vf.cell.pos.cont.mat <- makeContrasts(
vpa_lowFA = vpa_only - cntrl,
vpa_highFA = fa_and_vpa - fa_only,
FA_diff = (fa_and_vpa - fa_only) - (vpa_only - cntrl),
levels = c("cntrl", "fa_only", "vpa_only", "fa_and_vpa", "S1", "S2")
)
# fit the model/design matrix
vf.cell.pos.eb <- vf.cell.pos.data.for.sva %>%
lmFit(vf.cell.pos.design.sv) %>%
contrasts.fit(vf.cell.pos.cont.mat) %>%
eBayes()
# pull out the results for each metabolite for each comparison
vf.cell.pos.eb.tidy <- tidy(vf.cell.pos.eb) %>%
mutate(
adj_pval = p.adjust(p.value, method = "bonferroni"),
FC = 2 ^ estimate,
change_in_vpa = ifelse(FC < 1, "down", "up")
) %>%
rename(compound_short = gene)
# volcano plot
vf.cell.pos.eb.tidy %>%
ggplot(aes(estimate, -log10(adj_pval))) +
geom_point(size = 2, alpha = 0.5) +
geom_hline(linetype = "dashed", color = "#009E73", yintercept = -log10(0.05)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1.2)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1/1.2)) +
xlim(-2.5, 2.5) +
xlab("log2(FC)") +
ylab("-log10(adjusted p-value)") +
ggtitle("Volcano plot\nVPA + FA / Cells / Positive Mode")
# select statistically significant hits with a certain FC:
vf.cell.pos.hits <- vf.cell.pos.eb.tidy %>%
filter(adj_pval < 0.05 & (FC > 1.2 | FC < 1/1.2)) %>%
inner_join(vf.cell.pos.compound.info, by = "compound_short")
# how many metabolites are significant across the different contrats
table(vf.cell.pos.hits$term)
FA_diff vpa_highFA vpa_lowFA
0 24 28
# significant metabolites
sort(unique(vf.cell.pos.hits$compound_full))
[1] "3-Phosphoglyceroinositol"
[2] "Adenine"
[3] "Adenosine"
[4] "ADP-Glucose"
[5] "AMP"
[6] "Aspartic Acid"
[7] "ATP"
[8] "Beta-Alanine"
[9] "CMP"
[10] "CoA"
[11] "CTP"
[12] "D-Galactitol"
[13] "Dihydrobiopterin"
[14] "Fucose"
[15] "G3P"
[16] "GalNAc"
[17] "Glycerol-3-phosphocholine"
[18] "Glycerol 1-Phosphate / Glycerol 3-Phosphate"
[19] "Glycerol 1-phosphoserine"
[20] "GMP"
[21] "Guanine"
[22] "Guanosine"
[23] "Hypoxanthine"
[24] "Inosine"
[25] "N-Carbamoyl-L-aspartic Acid"
[26] "NADH"
[27] "NMN"
[28] "PAF C18"
[29] "Proline"
[30] "UMP"
[31] "Uracil"
[32] "UTP"
vf.cell.pos.hits.tally2 <- vf.cell.pos.hits %>%
group_by(compound_short, compound_full) %>%
count() %>%
filter(n == 2)
vf.cell.pos.lowFA.hits <- vf.cell.pos.hits %>%
filter(term == "vpa_lowFA" & !(compound_short %in% vf.cell.pos.hits.tally2$compound_short))
vf.cell.pos.highFA.hits <- vf.cell.pos.hits %>%
filter(term == "vpa_highFA" & !(compound_short %in% vf.cell.pos.hits.tally2$compound_short))
vf.cell.pos.both.hits <- vf.cell.pos.hits %>%
filter(compound_short %in% vf.cell.pos.hits.tally2$compound_short) %>%
arrange(compound_short, term)
vf.cell.pos.both.hits %>%
select(compound_full, term, FC) %>%
spread(key = "term", value = "FC") %>%
ggplot(aes(vpa_lowFA, vpa_highFA)) +
geom_point(size = 2, alpha = 0.8) +
geom_abline(intercept = 0, slope = 1, color = "blue", alpha = 0.8)
vf.cell.pos.both.hits %>%
select(compound_full, term, FC) %>%
spread(key = "term", value = "FC") %>%
mutate(diff = vpa_highFA - vpa_lowFA) %>%
arrange(desc(diff))
# A tibble: 20 x 4
compound_full vpa_highFA vpa_lowFA diff
<chr> <dbl> <dbl> <dbl>
1 ADP-Glucose 4.08 3.35 0.726
2 Dihydrobiopterin 2.81 2.59 0.220
3 Proline 1.64 1.45 0.188
4 GMP 0.590 0.515 0.0755
5 Guanine 0.460 0.416 0.0438
6 Glycerol 1-phosphoserine 0.748 0.722 0.0260
7 Glycerol-3-phosphocholine 0.224 0.214 0.0101
8 CMP 0.549 0.539 0.00911
9 Adenosine 0.434 0.433 0.000522
10 Guanosine 0.460 0.472 -0.0116
11 NMN 0.401 0.418 -0.0168
12 UMP 0.548 0.577 -0.0298
13 Hypoxanthine 0.501 0.533 -0.0317
14 3-Phosphoglyceroinositol 0.743 0.775 -0.0325
15 AMP 0.496 0.530 -0.0336
16 Inosine 0.516 0.566 -0.0504
17 GalNAc 0.575 0.652 -0.0767
18 D-Galactitol 0.470 0.563 -0.0933
19 Fucose 0.463 0.589 -0.126
20 Aspartic Acid 1.32 1.46 -0.140
#write_csv(path = "./results/vf_cell_pos_all_hits.csv", vf.cell.pos.hits)
### Plotting ###
vf.cell.pos.gathered <- vf.cell.pos.noNA %>%
filter(Group == "sample") %>%
bind_cols(vf.cell.pos.surr.var) %>%
select(Samples, VPA, FA, S1:S2, starts_with("VPA_FApC")) %>%
gather(key = "Compound", value = "Abundance", VPA_FApC10:VPA_FApC99)
vf.cell.pos.nested <- vf.cell.pos.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ S1 + S2, data = .))) %>%
mutate(augment_model = map(model, augment))
vf.cell.pos.modSV.resid <- vf.cell.pos.nested %>%
unnest(data, augment_model) %>%
select(Samples, VPA, FA, Compound, .resid) %>%
spread(Compound, .resid)
vf.cell.pos.modSV.resid %>%
select(Samples:FA, one_of(unique(vf.cell.pos.hits$compound_short))) %>%
HeatmapPrepAlt("VPA_FApC") %>%
t() %>%
heatmaply(
colors = viridis(n = 10, option = "magma"),
xlab = "Samples", ylab = "Compounds",
main = "Statistically significant compounds\nVPA + FA Experiment / Cells / Positive Mode",
margins = c(50, 50, 75, 30),
k_col = 2, k_row = 2,
labRow = unique(vf.cell.pos.hits$compound_full)
)
### PCA - cleaned data ###
vf.cell.pos.modSV.pca <- vf.cell.pos.modSV.resid %>%
select(starts_with("VPA_FApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
vf.cell.pos.modSV.pca.x <- as.data.frame(vf.cell.pos.modSV.pca$x)
row.names(vf.cell.pos.modSV.pca.x) <- vf.cell.pos.modSV.resid$Samples
vf.cell.pos.modSV.pca.x <- vf.cell.pos.modSV.pca.x %>%
bind_cols(vf.cell.pos.modSV.resid %>% select(VPA:FA))
vf.cell.pos.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (60.6% Var)") +
ylab("PC2 (11.6% Var)")
vf.cell.pos.modSV.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (7.4% Var)") +
ylab("PC4 (4.5% Var)")
# sample prep
vf.med.neg.smpl.data <- vf.med.neg.noNA %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_")
vf.med.neg.data.for.sva <- as.matrix(
vf.med.neg.smpl.data[, which(
colnames(vf.med.neg.smpl.data) == "VPA_FAnM1"
):ncol(vf.med.neg.smpl.data)]
)
row.names(vf.med.neg.data.for.sva) <- vf.med.neg.smpl.data$Samples
vf.med.neg.data.for.sva <- t(vf.med.neg.data.for.sva)
# pheno prep
vf.med.neg.data.pheno <- as.data.frame(vf.med.neg.smpl.data[, 5:6])
row.names(vf.med.neg.data.pheno) <- vf.med.neg.smpl.data$Samples
# sva calculation
vf.med.neg.mod.vf <- model.matrix(~ 0 + as.factor(Treatment), data = vf.med.neg.data.pheno)
vf.med.neg.mod0 <- model.matrix(~ 1, data = vf.med.neg.data.pheno)
set.seed(2018)
num.sv(vf.med.neg.data.for.sva, vf.med.neg.mod.vf, method = "be")
[1] 1
set.seed(2018)
num.sv(vf.med.neg.data.for.sva, vf.med.neg.mod.vf, method = "leek")
[1] 0
set.seed(2018)
vf.med.neg.sv <- sva(vf.med.neg.data.for.sva, vf.med.neg.mod.vf, vf.med.neg.mod0)
Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5
# extract the surrogate variables
vf.med.neg.surr.var <- as.data.frame(vf.med.neg.sv$sv)
colnames(vf.med.neg.surr.var) <- c("S1")
vf.med.neg.covar <- vf.med.neg.smpl.pca.x %>%
select(Samples, PC1:PC4) %>%
inner_join(
cbind(vf.med.neg.data.pheno, vf.med.neg.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vf.cell.other.info, by = "Samples") %>%
as_tibble()
vf.med.neg.covar %>%
ggplot(aes(Plate, S1, color = Plate)) +
geom_boxplot() +
geom_jitter(size = 3, alpha = 0.8, width = 0.1)
summary(aov(S1 ~ Plate, data = vf.med.neg.covar))
Df Sum Sq Mean Sq F value Pr(>F)
Plate 3 0.0944 0.03146 0.695 0.566
Residuals 20 0.9056 0.04528
vf.med.neg.covar %>%
ggplot(aes(Plate, S1)) +
geom_point(size = 3, alpha = 0.8, aes(color = Treatment))
vf.med.neg.covar %>%
select(Samples:Plate) %>%
gather("PC", "value", PC1:PC4) %>%
ggplot(aes(Plate, value)) +
geom_boxplot() +
geom_point(size = 2, alpha = 0.8, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vf.med.neg.covar %>%
select(PC1:PC4, S1) %>%
ggcorr(label = TRUE)
vf.med.neg.covar %>%
select(PC1:PC4, S1) %>%
ggpairs()
vf.med.neg.covar %>%
ggplot(aes(PC2, S1)) +
geom_point(size = 2, alpha = 0.8)
vf.med.neg.covar %>%
select(PC1:PC4, run_order:prot_conc_ug_uL) %>%
ggcorr(label = TRUE)
vf.med.neg.covar %>%
select(PC1:PC4, run_order:prot_conc_ug_uL) %>%
ggpairs()
vf.med.neg.covar %>%
ggplot(aes(run_order, PC4, color = Plate)) +
geom_point(size = 4, alpha = 0.8)
vf.med.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, PC1, color = Treatment)) +
geom_point(size = 4, alpha = 0.8)
vf.med.neg.covar %>%
select(S1:prot_conc_ug_uL) %>%
ggcorr(label = TRUE)
vf.med.neg.covar %>%
select(S1:prot_conc_ug_uL) %>%
ggpairs()
colnames(vf.med.neg.mod.vf) <- c("cntrl", "fa_only", "vpa_only", "fa_and_vpa")
# combine the full model matrix and the surrogate variables into one
vf.med.neg.design.sv <- cbind(vf.med.neg.mod.vf, vf.med.neg.surr.var)
vf.med.neg.cont.mat <- makeContrasts(
vpa_lowFA = vpa_only - cntrl,
vpa_highFA = fa_and_vpa - fa_only,
FA_diff = (fa_and_vpa - fa_only) - (vpa_only - cntrl),
levels = c("cntrl", "fa_only", "vpa_only", "fa_and_vpa", "S1")
)
# fit the model/design matrix
vf.med.neg.eb <- vf.med.neg.data.for.sva %>%
lmFit(vf.med.neg.design.sv) %>%
contrasts.fit(vf.med.neg.cont.mat) %>%
eBayes()
# pull out the results for each metabolite for each comparison
vf.med.neg.eb.tidy <- tidy(vf.med.neg.eb) %>%
mutate(
adj_pval = p.adjust(p.value, method = "bonferroni"),
FC = 2 ^ estimate,
change_in_vpa = ifelse(FC < 1, "down", "up")
) %>%
rename(compound_short = gene)
# volcano plot
vf.med.neg.eb.tidy %>%
ggplot(aes(estimate, -log10(adj_pval))) +
geom_point(size = 2, alpha = 0.5) +
geom_hline(linetype = "dashed", color = "#009E73", yintercept = -log10(0.05)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1.2)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1/1.2)) +
xlab("log2(FC)") +
ylab("-log10(adjusted p-value)") +
ggtitle("Volcano plot\nVPA + FA / meds / Negative Mode")
# select statistically significant hits with a certain FC:
vf.med.neg.hits <- vf.med.neg.eb.tidy %>%
filter(adj_pval < 0.05 & (FC > 1.2 | FC < 1/1.2)) %>%
inner_join(vf.med.neg.compound.info, by = "compound_short")
# how many metabolites are significant across the different contrats
table(vf.med.neg.hits$term)
FA_diff vpa_highFA vpa_lowFA
0 1 1
# significant metabolites
sort(unique(vf.med.neg.hits$compound_full))
[1] "Valproic acid"
vf.med.neg.hits
# A tibble: 2 x 14
compound_short term estimate statistic p.value lod adj_pval FC
<chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 VPA_FAnM27 vpa_~ 3.98 45.5 1.62e-22 41.7 2.08e-20 15.8
2 VPA_FAnM27 vpa_~ 3.99 45.4 1.66e-22 41.7 2.14e-20 15.9
# ... with 6 more variables: change_in_vpa <chr>, compound_full <chr>,
# formula <chr>, mass <dbl>, rt <dbl>, cas_id <chr>
#write_csv(path = "./results/vf_med_neg_all_hits.csv", vf.med.neg.hits)
### Plotting ###
vf.med.neg.gathered <- vf.med.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vf.med.neg.surr.var) %>%
select(Samples, VPA, FA, S1, starts_with("VPA_FAnM")) %>%
gather(key = "Compound", value = "Abundance", VPA_FAnM1:VPA_FAnM9)
vf.med.neg.nested <- vf.med.neg.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ S1, data = .))) %>%
mutate(augment_model = map(model, augment))
vf.med.neg.modSV.resid <- vf.med.neg.nested %>%
unnest(data, augment_model) %>%
select(Samples, VPA, FA, Compound, .resid) %>%
spread(Compound, .resid)
vf.med.neg.modSV.resid %>%
select(Samples:FA, one_of(unique(vf.med.neg.hits$compound_short))) %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(Treatment, VPA_FAnM27)) +
geom_boxplot()
# sample prep
vf.med.pos.smpl.data <- vf.med.pos.noNA %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_")
vf.med.pos.data.for.sva <- as.matrix(
vf.med.pos.smpl.data[, which(
colnames(vf.med.pos.smpl.data) == "VPA_FApM1"
):ncol(vf.med.pos.smpl.data)]
)
row.names(vf.med.pos.data.for.sva) <- vf.med.pos.smpl.data$Samples
vf.med.pos.data.for.sva <- t(vf.med.pos.data.for.sva)
# pheno prep
vf.med.pos.data.pheno <- as.data.frame(vf.med.pos.smpl.data[, 5:6])
row.names(vf.med.pos.data.pheno) <- vf.med.pos.smpl.data$Samples
# sva calculation
vf.med.pos.mod.vf <- model.matrix(~ 0 + as.factor(Treatment), data = vf.med.pos.data.pheno)
vf.med.pos.mod0 <- model.matrix(~ 1, data = vf.med.pos.data.pheno)
set.seed(2018)
num.sv(vf.med.pos.data.for.sva, vf.med.pos.mod.vf, method = "be")
[1] 1
set.seed(2018)
num.sv(vf.med.pos.data.for.sva, vf.med.pos.mod.vf, method = "leek")
[1] 0
set.seed(2018)
vf.med.pos.sv <- sva(vf.med.pos.data.for.sva, vf.med.pos.mod.vf, vf.med.pos.mod0)
Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5
# extract the surrogate variables
vf.med.pos.surr.var <- as.data.frame(vf.med.pos.sv$sv)
colnames(vf.med.pos.surr.var) <- c("S1")
vf.med.pos.covar <- vf.med.pos.smpl.filt.pca.x %>%
select(Samples, PC1:PC4) %>%
inner_join(
cbind(vf.med.pos.data.pheno, vf.med.pos.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vf.cell.other.info, by = "Samples") %>%
as_tibble() %>%
filter(!(is.na(Treatment)))
vf.med.pos.covar %>%
ggplot(aes(Plate, S1, color = Plate)) +
geom_boxplot() +
geom_jitter(size = 3, alpha = 0.8, width = 0.1)
summary(aov(S1 ~ Plate, data = vf.med.pos.covar))
Df Sum Sq Mean Sq F value Pr(>F)
Plate 3 0.0968 0.03226 0.679 0.576
Residuals 19 0.9032 0.04754
vf.med.pos.covar %>%
ggplot(aes(Plate, S1)) +
geom_point(size = 3, alpha = 0.8, aes(color = Treatment))
vf.med.pos.covar %>%
select(Samples:Plate) %>%
gather("PC", "value", PC1:PC4) %>%
ggplot(aes(Plate, value)) +
geom_boxplot() +
geom_point(size = 2, alpha = 0.8, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
summary(aov(PC3 ~ Plate, data = vf.med.pos.covar))
Df Sum Sq Mean Sq F value Pr(>F)
Plate 3 3.439 1.1462 2.542 0.0869 .
Residuals 19 8.567 0.4509
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vf.med.pos.covar %>%
select(PC1:PC4, S1) %>%
ggcorr(label = TRUE)
vf.med.pos.covar %>%
select(PC1:PC4, S1) %>%
ggpairs()
vf.med.pos.covar %>%
ggplot(aes(PC1, S1)) +
geom_point(size = 2, alpha = 0.8)
vf.med.pos.covar %>%
ggplot(aes(Treatment, PC1)) +
geom_boxplot()
vf.med.pos.covar %>%
ggplot(aes(PC2, S1)) +
geom_point(size = 2, alpha = 0.8)
vf.med.pos.covar %>%
select(PC1:PC4, run_order:prot_conc_ug_uL) %>%
ggcorr(label = TRUE)
vf.med.pos.covar %>%
select(PC1:PC4, run_order:prot_conc_ug_uL) %>%
ggpairs()
vf.med.pos.covar %>%
ggplot(aes(run_order, PC4, color = Plate)) +
geom_point(size = 4, alpha = 0.8)
vf.med.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, PC1, color = Treatment)) +
geom_point(size = 4, alpha = 0.8)
vf.med.pos.covar %>%
select(S1:prot_conc_ug_uL) %>%
ggcorr(label = TRUE)
vf.med.pos.covar %>%
select(S1:prot_conc_ug_uL) %>%
ggpairs()
colnames(vf.med.pos.mod.vf) <- c("cntrl", "fa_only", "vpa_only", "fa_and_vpa")
# combine the full model matrix and the surrogate variables into one
vf.med.pos.design.sv <- cbind(vf.med.pos.mod.vf, vf.med.pos.surr.var)
vf.med.pos.cont.mat <- makeContrasts(
vpa_lowFA = vpa_only - cntrl,
vpa_highFA = fa_and_vpa - fa_only,
FA_diff = (fa_and_vpa - fa_only) - (vpa_only - cntrl),
levels = c("cntrl", "fa_only", "vpa_only", "fa_and_vpa", "S1")
)
# fit the model/design matrix
vf.med.pos.eb <- vf.med.pos.data.for.sva %>%
lmFit(vf.med.pos.design.sv) %>%
contrasts.fit(vf.med.pos.cont.mat) %>%
eBayes()
# pull out the results for each metabolite for each comparison
vf.med.pos.eb.tidy <- tidy(vf.med.pos.eb) %>%
mutate(
adj_pval = p.adjust(p.value, method = "bonferroni"),
FC = 2 ^ estimate,
change_in_vpa = ifelse(FC < 1, "down", "up")
) %>%
rename(compound_short = gene)
# volcano plot
vf.med.pos.eb.tidy %>%
ggplot(aes(estimate, -log10(adj_pval))) +
geom_point(size = 2, alpha = 0.5) +
geom_hline(linetype = "dashed", color = "#009E73", yintercept = -log10(0.05)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1.2)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1/1.2)) +
xlab("log2(FC)") +
ylab("-log10(adjusted p-value)") +
ggtitle("Volcano plot\nVPA + FA / meds / posative Mode")
# select statistically significant hits with a certain FC:
vf.med.pos.hits <- vf.med.pos.eb.tidy %>%
filter(adj_pval < 0.05 & (FC > 1.2 | FC < 1/1.2)) %>%
inner_join(vf.med.pos.compound.info, by = "compound_short")
# how many metabolites are significant across the different contrats
table(vf.med.pos.hits$term)
FA_diff vpa_highFA vpa_lowFA
0 0 0
sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] bindrcpp_0.2.2 ggridges_0.5.1 biobroom_1.14.0
[4] broom_0.5.0 limma_3.38.2 sva_3.30.0
[7] BiocParallel_1.16.0 genefilter_1.64.0 mgcv_1.8-26
[10] nlme_3.1-137 heatmaply_0.15.2 viridis_0.5.1
[13] viridisLite_0.3.0 plotly_4.8.0 GGally_1.4.0
[16] cowplot_0.9.3 forcats_0.3.0 stringr_1.3.1
[19] dplyr_0.7.8 purrr_0.2.5 readr_1.2.1
[22] tidyr_0.8.2 tibble_1.4.2 ggplot2_3.1.0
[25] tidyverse_1.2.1
loaded via a namespace (and not attached):
[1] colorspace_1.3-2 class_7.3-14 modeltools_0.2-22
[4] mclust_5.4.2 rprojroot_1.3-2 rstudioapi_0.8
[7] flexmix_2.3-14 bit64_0.9-7 fansi_0.4.0
[10] AnnotationDbi_1.44.0 mvtnorm_1.0-8 lubridate_1.7.4
[13] xml2_1.2.0 splines_3.5.1 codetools_0.2-15
[16] robustbase_0.93-3 knitr_1.20 jsonlite_1.5
[19] annotate_1.60.0 cluster_2.0.7-1 kernlab_0.9-27
[22] shiny_1.2.0 compiler_3.5.1 httr_1.3.1
[25] backports_1.1.2 assertthat_0.2.0 Matrix_1.2-15
[28] lazyeval_0.2.1 cli_1.0.1 later_0.7.5
[31] htmltools_0.3.6 tools_3.5.1 gtable_0.2.0
[34] glue_1.3.0 reshape2_1.4.3 Rcpp_1.0.0
[37] Biobase_2.42.0 cellranger_1.1.0 trimcluster_0.1-2.1
[40] gdata_2.18.0 crosstalk_1.0.0 iterators_1.0.10
[43] fpc_2.1-11.1 rvest_0.3.2 mime_0.6
[46] gtools_3.8.1 XML_3.98-1.16 dendextend_1.9.0
[49] DEoptimR_1.0-8 MASS_7.3-51.1 scales_1.0.0
[52] TSP_1.1-6 promises_1.0.1 hms_0.4.2
[55] parallel_3.5.1 RColorBrewer_1.1-2 yaml_2.2.0
[58] memoise_1.1.0 gridExtra_2.3 reshape_0.8.8
[61] stringi_1.2.4 RSQLite_2.1.1 gclus_1.3.1
[64] S4Vectors_0.20.1 foreach_1.4.4 seriation_1.2-3
[67] caTools_1.17.1.1 BiocGenerics_0.28.0 matrixStats_0.54.0
[70] rlang_0.3.0.1 pkgconfig_2.0.2 prabclus_2.2-6
[73] bitops_1.0-6 evaluate_0.12 lattice_0.20-38
[76] bindr_0.1.1 labeling_0.3 htmlwidgets_1.3
[79] bit_1.1-14 tidyselect_0.2.5 plyr_1.8.4
[82] magrittr_1.5 R6_2.3.0 IRanges_2.16.0
[85] gplots_3.0.1 DBI_1.0.0 pillar_1.3.0
[88] haven_2.0.0 whisker_0.3-2 withr_2.1.2
[91] survival_2.43-3 RCurl_1.95-4.11 nnet_7.3-12
[94] modelr_0.1.2 crayon_1.3.4 utf8_1.1.4
[97] KernSmooth_2.23-15 rmarkdown_1.10 grid_3.5.1
[100] readxl_1.1.0 data.table_1.11.8 blob_1.1.1
[103] digest_0.6.18 diptest_0.75-7 webshot_0.5.1
[106] xtable_1.8-3 httpuv_1.4.5 stats4_3.5.1
[109] munsell_0.5.0 registry_0.5